Load Packages
library(Seurat)
## Attaching SeuratObject
## Attaching sp
library(magrittr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(ggplot2)
library(cowplot)
Load Data. This data contains only Endothelial cells. These have been selected from a larger object.
Samples include Lean, Obese and Unhealthy Obese this is “condition”
endo_cells <- readRDS(file ="/home/lucamannino/R_Projects/Intagration_endoCellAtlas/endo_cells.RDS")
We build a new UMAP plot to look at the data We use the integrated assay for this step
DefaultAssay(endo_cells) <- "integrated"
endo_cells <- FindNeighbors(endo_cells, dims = 1:25)
## Computing nearest neighbor graph
## Computing SNN
endo_cells <- RunUMAP(endo_cells, dims = 1:25)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 12:47:07 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:47:07 Read 27325 rows and found 25 numeric columns
## 12:47:07 Using Annoy for neighbor search, n_neighbors = 30
## 12:47:07 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:47:12 Writing NN index file to temp file /tmp/RtmpJ3HRca/file27e44f6445c235
## 12:47:12 Searching Annoy index using 1 thread, search_k = 3000
## 12:47:22 Annoy recall = 100%
## 12:47:23 Commencing smooth kNN distance calibration using 1 thread
## 12:47:24 Initializing from normalized Laplacian + noise
## 12:47:25 Commencing optimization for 200 epochs, with 1313600 positive edges
## 12:47:37 Optimization finished
endo_cells <- FindClusters(endo_cells, resolution = 0.8)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 27325
## Number of edges: 1089437
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8631
## Number of communities: 15
## Elapsed time: 5 seconds
There are several patients included in this study, the patients labels are recorded under endo_cells@meta.data[[“orig.ident”]].
Patients have been categorized by their condition: Obese, Unhelthy Obese and Lean (endo_cells@meta.data[[“condition”]]).
From each patient subcutaneous and visceral cells were analysed, which cells belong to which tissue is recorded under endo_cells@meta.data[[“type”]]
DimPlot(endo_cells, label=TRUE, group.by = "type")
DimPlot(endo_cells, label=TRUE, group.by = "condition")
DimPlot(endo_cells, label=TRUE, group.by = "orig.ident")
In the following plot we look at specific markers to further clean the object from non endothelial cells For this we use the SCT assay
DefaultAssay(endo_cells) <- "SCT"
FeaturePlot(endo_cells, c("PECAM1","CDH5","CLU","VWF"))
DotPlot(endo_cells, features = c("PECAM1","CDH5","CLU","VWF"), assay="SCT")
DotPlot(endo_cells, features = c("FLRT2","ACSL4","COL8A1","CACNB2"), assay="SCT")
There are several patients included in this study, the patients labels are recorded under endo_cells@meta.data[[“orig.ident”]].
Patients have been categorized by their condition: Obese, Unhelthy Obese and Lean (endo_cells@meta.data[[“condition”]]).
From each patient subcutaneous and visceral cells were analysed, which cells belong to which tissue is recorded under endo_cells@meta.data[[“type”]]
DimPlot(endo_cells, label=TRUE, group.by = "type")
DimPlot(endo_cells, label=TRUE, group.by = "condition")
DimPlot(endo_cells, label=TRUE, group.by = "orig.ident")
we remove clusters 0,1,2,13 as they are not endothelial
EndoCells1 <- subset(endo_cells, idents = c(3,4,5,6,7,8,9,10,11,12,14))
New Umap plot is produced
DefaultAssay(EndoCells1) <- "integrated"
EndoCells1 <- FindNeighbors(EndoCells1, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
EndoCells1 <- RunUMAP(EndoCells1, dims = 1:20)
## 12:48:06 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:48:06 Read 13643 rows and found 20 numeric columns
## 12:48:06 Using Annoy for neighbor search, n_neighbors = 30
## 12:48:06 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:48:09 Writing NN index file to temp file /tmp/RtmpJ3HRca/file27e44f1ea932e1
## 12:48:09 Searching Annoy index using 1 thread, search_k = 3000
## 12:48:14 Annoy recall = 100%
## 12:48:14 Commencing smooth kNN distance calibration using 1 thread
## 12:48:15 Initializing from normalized Laplacian + noise
## 12:48:16 Commencing optimization for 200 epochs, with 643354 positive edges
## 12:48:22 Optimization finished
EndoCells1 <- FindClusters(EndoCells1, resolution = 3.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 13643
## Number of edges: 558064
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7377
## Number of communities: 43
## Elapsed time: 1 seconds
Check for immune cells
DefaultAssay(EndoCells1) <- "integrated"
EndoCells1 <- FindClusters(EndoCells1, resolution = 1.0)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 13643
## Number of edges: 558064
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8496
## Number of communities: 15
## Elapsed time: 1 seconds
DefaultAssay(EndoCells1) <- "SCT"
FeaturePlot(EndoCells1, c("CD163","MRC1","PTPRC"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
FeaturePlot(EndoCells1, c("CDH5","PECAM1","LYVE1","PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
DimPlot(EndoCells1, label=TRUE)
DotPlot(EndoCells1,features = c("CD163","MRC1","PTPRC","CDH5","PECAM1","LYVE1","PROX1"), assay = "SCT",cols = c("blue", "red"))
DimPlot(EndoCells1, label=TRUE, group.by = "type")
DimPlot(EndoCells1, label=TRUE, group.by = "condition")
DimPlot(EndoCells1, label=TRUE, group.by = "orig.ident")
removing clusters 7 and 8 and 14
EndoCells2 <- subset(EndoCells1, idents = c(0,1,2,3,4,5,6,9,10,11,12,13))
In this step we decided to carry out a new SCT transform and a new integration in order to obtain better results. As we have removed a lot of cells from the original SCT and integration assay it is usefull to run the integration and the normalisation again.
To acheve this we split the object in its original samples, run SCT transform on the “SOUP” assay. We will then integrate the object by sample.
EndoCells2.list <- SplitObject(EndoCells2, split.by = "orig.ident")
for (i in 1:length(EndoCells2.list)) {
EndoCells2.list[[i]] <- SCTransform(EndoCells2.list[[i]], assay = "soup", variable.features.n = 3000, verbose = FALSE)
}
need to use: options(future.globals.maxSize = 2500 * 1024^2) in order to increase the maximum allowed size for the integration algorithm
options(future.globals.maxSize = 2500 * 1024^2)
EndoCells2.list.features <- SelectIntegrationFeatures(object.list = EndoCells2.list, nfeatures = 3000)
EndoCells2.list <- PrepSCTIntegration(object.list = EndoCells2.list, anchor.features = EndoCells2.list.features,
verbose = FALSE)
options(future.globals.maxSize = 2500 * 1024^2)
EndoCells2.list.anchors <- FindIntegrationAnchors(object.list = EndoCells2.list, normalization.method = "SCT",
anchor.features = EndoCells2.list.features, verbose = FALSE)
EndoCells2.list.integrated <- IntegrateData(anchorset = EndoCells2.list.anchors, normalization.method = "SCT",
verbose = FALSE)
We produce a new UMAP plot from the integrated data
DefaultAssay(EndoCells2.list.integrated) <- "integrated"
EndoCells2.list.integrated <- RunPCA(EndoCells2.list.integrated, npcs = 40, verbose = FALSE)
EndoCells2.list.integrated <- FindNeighbors(EndoCells2.list.integrated, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
EndoCells2.list.integrated <- RunUMAP(EndoCells2.list.integrated, dims = 1:20)
## 13:02:52 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:02:52 Read 11622 rows and found 20 numeric columns
## 13:02:52 Using Annoy for neighbor search, n_neighbors = 30
## 13:02:52 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:02:54 Writing NN index file to temp file /tmp/RtmpJ3HRca/file27e44f4814034c
## 13:02:54 Searching Annoy index using 1 thread, search_k = 3000
## 13:02:57 Annoy recall = 100%
## 13:02:57 Commencing smooth kNN distance calibration using 1 thread
## 13:02:58 Initializing from normalized Laplacian + noise
## 13:02:59 Commencing optimization for 200 epochs, with 514826 positive edges
## 13:03:04 Optimization finished
EndoCells2.list.integrated <- FindClusters(EndoCells2.list.integrated, resolution = 0.30)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 11622
## Number of edges: 473471
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9103
## Number of communities: 8
## Elapsed time: 1 seconds
QC the new object
DefaultAssay(EndoCells2.list.integrated) <- "SCT"
DimPlot(EndoCells2.list.integrated, label=TRUE, group.by = "type")
DimPlot(EndoCells2.list.integrated, label=TRUE, group.by = "condition")
DimPlot(EndoCells2.list.integrated, label=TRUE, group.by = "orig.ident")
QC checks to see if we have any non endothelial cells left
DefaultAssay(EndoCells2.list.integrated) <- "SCT"
FeaturePlot(EndoCells2.list.integrated, c("CD163","MRC1","PTPRC"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
FeaturePlot(EndoCells2.list.integrated, c("CDH5","PECAM1","LYVE1","PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
FeaturePlot(EndoCells2.list.integrated, c("CDH5"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
DimPlot(EndoCells2.list.integrated, label=TRUE)
DotPlot(EndoCells2.list.integrated,features = c("CD163","MRC1","PTPRC","CDH5","PECAM1", "VCAM1", "LYVE1","PROX1"), assay = "SCT",cols = c("blue", "red"))
We remove cluster 3 from the object
Strict2 <- subset(EndoCells2.list.integrated, idents = c(0,1,2,4,5,6,7))
DefaultAssay(Strict2) <- "integrated"
Strict2 <- FindNeighbors(Strict2, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
Strict2 <- RunUMAP(Strict2, dims = 1:20)
## 13:03:20 UMAP embedding parameters a = 0.9922 b = 1.112
## 13:03:20 Read 10236 rows and found 20 numeric columns
## 13:03:20 Using Annoy for neighbor search, n_neighbors = 30
## 13:03:20 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 13:03:21 Writing NN index file to temp file /tmp/RtmpJ3HRca/file27e44f5d3a1ebf
## 13:03:21 Searching Annoy index using 1 thread, search_k = 3000
## 13:03:24 Annoy recall = 100%
## 13:03:24 Commencing smooth kNN distance calibration using 1 thread
## 13:03:25 Initializing from normalized Laplacian + noise
## 13:03:25 Commencing optimization for 200 epochs, with 452802 positive edges
## 13:03:30 Optimization finished
Strict2 <- FindClusters(Strict2, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10236
## Number of edges: 415950
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8695
## Number of communities: 12
## Elapsed time: 1 seconds
DefaultAssay(Strict2) <- "integrated"
Strict2 <- FindClusters(Strict2, resolution = 0.35)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 10236
## Number of edges: 415950
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8929
## Number of communities: 8
## Elapsed time: 1 seconds
DefaultAssay(Strict2) <- "SCT"
DimPlot(Strict2, label=TRUE, group.by = "type")
DimPlot(Strict2, label=TRUE, group.by = "condition")
DimPlot(Strict2, label=TRUE, group.by = "orig.ident")
DimPlot(Strict2, label=TRUE)
DimPlot(Strict2, label=TRUE, split.by = "type")
FeaturePlot(Strict2, c("PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0, split.by ="type" )
FeaturePlot(Strict2, c("CD163","MRC1","PTPRC"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
FeaturePlot(Strict2, c("CDH5","PECAM1","LYVE1","PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
FeaturePlot(Strict2, c("ARL15"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
DimPlot(Strict2, label=TRUE)
DotPlot(Strict2,features = c("ARL15","MRC1","PTPRC","CDH5","PECAM1","LYVE1","PROX1"), assay = "SCT",cols = c("blue", "red"))
To label the endothelial subclusters we use Find all marker function, and then use this marker genes to appropiately label subclusters
DefaultAssay(Strict2) <- "SCT"
combined.sct <- PrepSCTFindMarkers(Strict2)
## Found 8 SCT models. Recorrecting SCT counts using minimum median counts: 1216.34073055455
combined.markers <- FindAllMarkers(combined.sct, assay = "SCT", verbose = FALSE, only.pos = TRUE, logfc.threshold = 0.05,)
## For a more efficient implementation of the Wilcoxon Rank Sum Test,
## (default method for FindMarkers) please install the limma package
## --------------------------------------------
## install.packages('BiocManager')
## BiocManager::install('limma')
## --------------------------------------------
## After installation of limma, Seurat will automatically use the more
## efficient implementation (no further action necessary).
## This message will be shown once per session
head(combined.markers, n = 15)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## PKHD1L1 0.000000e+00 1.3393360 0.427 0.084 0.000000e+00 0 PKHD1L1
## RELN 0.000000e+00 1.0745094 0.360 0.071 0.000000e+00 0 RELN
## MMRN1 0.000000e+00 0.9988334 0.384 0.071 0.000000e+00 0 MMRN1
## PIEZO2 4.648381e-308 0.7442944 0.329 0.055 1.057600e-303 0 PIEZO2
## DOCK5 7.499172e-308 0.8076344 0.378 0.084 1.706212e-303 0 DOCK5
## PGM5 1.557859e-266 0.6269384 0.453 0.146 3.544440e-262 0 PGM5
## NRG3 3.081320e-260 0.8306883 0.319 0.064 7.010620e-256 0 NRG3
## VAV3 2.137763e-257 0.6256245 0.358 0.089 4.863838e-253 0 VAV3
## MPP7 8.685045e-255 0.6305471 0.276 0.045 1.976021e-250 0 MPP7
## PDE1A 1.074184e-241 0.9144833 0.343 0.088 2.443983e-237 0 PDE1A
## TFPI 1.054771e-240 0.7126664 0.625 0.305 2.399816e-236 0 TFPI
## TLL1 1.804908e-224 0.6305531 0.698 0.350 4.106526e-220 0 TLL1
## LYVE1 1.930487e-218 0.4650901 0.242 0.039 4.392244e-214 0 LYVE1
## PROX1 1.677030e-217 0.5227144 0.242 0.040 3.815578e-213 0 PROX1
## STAB2 7.975090e-213 0.3713843 0.206 0.025 1.814492e-208 0 STAB2
#We produce a table with top 50 marker genes per cluster to facilitate use
combined.markers %>%
group_by(cluster) %>%
slice_max(n = 50, order_by = avg_log2FC)
## # A tibble: 400 × 7
## # Groups: cluster [8]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 1.34 0.427 0.084 0 0 PKHD1L1
## 2 0 1.07 0.36 0.071 0 0 RELN
## 3 1.50e-172 1.07 0.414 0.188 3.40e-168 0 EFNA5
## 4 0 0.999 0.384 0.071 0 0 MMRN1
## 5 1.07e-241 0.914 0.343 0.088 2.44e-237 0 PDE1A
## 6 1.24e-122 0.887 0.578 0.418 2.82e-118 0 PPFIBP1
## 7 3.08e-260 0.831 0.319 0.064 7.01e-256 0 NRG3
## 8 1.69e-203 0.817 0.417 0.15 3.84e-199 0 LINC02147
## 9 7.50e-308 0.808 0.378 0.084 1.71e-303 0 DOCK5
## 10 4.65e-308 0.744 0.329 0.055 1.06e-303 0 PIEZO2
## # … with 390 more rows
#Following code is to produce a heatmap of the 20 top marker genes per cluster
combined.markers %>%
group_by(cluster) %>%
top_n(n = 20, wt = avg_log2FC) -> top20
DoHeatmap(combined.sct, features = top20$gene) + NoLegend()
## Warning in DoHeatmap(combined.sct, features = top20$gene): The following
## features were omitted as they were not found in the scale.data slot for the SCT
## assay: LPP, PTEN, PKHD1L1
genes <- c(“EFNB2”,“ARL15”,“NKAIN2”,‘GJA5’,‘IGFBP3’,‘FBLN5’,‘CD36’,‘CADM2’,‘FABP4’,‘NRP1’,‘BTNL9’,‘PPARG’,‘COL4A1’,‘PDE3B’,‘AQP7’,‘PLIN1’,‘IGF1’,‘LPL’,‘ANGPT1’,‘ACSL1’,‘ZNF385D’,‘ADAMTS9’,‘HDAC9’,‘COL15A1’,‘VCAN’,‘LRRC1’,‘PLCXD3’,‘POSTN’,‘RYR3’,‘SELE’,‘RELN’,‘MMRN1’,‘STOX2’,‘NRG1’,‘CD163’,‘MRC1’,‘CDH1’,‘FLRT2’,‘ACSL4’,‘COL8A1’,‘CACNB2’)
genes <- c("EFNB2","ARL15","NKAIN2",'GJA5','IGFBP3','FBLN5','CD36','CADM2','FABP4','NRP1','BTNL9','PPARG','COL4A1','PDE3B','AQP7','PLIN1','IGF1','LPL','ANGPT1','ACSL1','ZNF385D','ADAMTS9','HDAC9','COL15A1','VCAN','LRRC1','PLCXD3','POSTN','RYR3','SELE','RELN','MMRN1','STOX2','NRG1','CD163','MRC1','CDH1','FLRT2','ACSL4','COL8A1','CACNB2')
DefaultAssay(Strict2) <- "SCT"
FeaturePlot(Strict2, genes)
DotPlot(Strict2,features = genes, assay = "SCT",cols = c("blue", "red"))+RotatedAxis()
FindSubCluster( object, cluster, graph.name, subcluster.name = “sub.cluster”, resolution = 0.5, algorithm = 1 )
DefaultAssay(Strict2) <- "integrated"
Strict3 <- FindSubCluster(Strict2, 0, subcluster.name = "sub.cluster", graph.name = "integrated_nn",
resolution = 0.2,
algorithm = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 3295
## Number of edges: 31455
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8250
## Number of communities: 13
## Elapsed time: 0 seconds
## 11 singletons identified. 2 final clusters.
FeaturePlot(Strict3, genes)
## Warning: Found the following features in more than one assay, excluding the
## default. We will not include these in the final data frame: PLIN1, SELE, CDH1
## Warning in FetchData.Seurat(object = object, vars = c(dims, "ident",
## features), : The following requested variables were not found: PLIN1, SELE, CDH1
DotPlot(Strict3,features = genes, assay = "SCT",cols = c("blue", "red"))+RotatedAxis()
DefaultAssay(Strict3) <- "SCT"
FeaturePlot(Strict3, c("CD163","MRC1","PTPRC"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
FeaturePlot(Strict3, c("CDH5","PECAM1","LYVE1","PROX1"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
FeaturePlot(Strict3, c("CDH5"), pt.size = 0.1,reduction ="umap",min.cutoff=0)
DimPlot(Strict3, label=TRUE, group.by="sub.cluster")
DotPlot(Strict3,features = c("CD163","MRC1","PTPRC","CDH5","PECAM1","LYVE1","PROX1"), assay = "SCT",cols = c("blue", "red"))
stem.combined <- RenameIdents(object = stem.combined,
0 = “your cell type”, 1 = “your other cell type”, 2 = “your last cell type”)
Idents(Strict3) <- "sub.cluster"
Strict3 <- RenameIdents(object = Strict3, "0_0" = "Lymphatic", "0_1" = "Vein_1", `1` = "Artery", "2"="Capillary", "3"="Vein_2","4"="Unknown","5"="Vein_3", "6"="Capillary/Arteriole","7"="Endo_MT")
DimPlot(Strict3, label=TRUE)
DimPlot(Strict3, label=FALSE, group.by = "type")
DimPlot(Strict3, label=FALSE, group.by = "condition")
DimPlot(Strict3, label=FALSE, group.by = "orig.ident")
DimPlot(Strict3, label=FALSE, split.by = "type")
DefaultAssay(Strict3) <- "SCT"
combined.Strict3 <- PrepSCTFindMarkers(Strict3)
## Found 8 SCT models. Recorrecting SCT counts using minimum median counts: 1216.34073055455
combined.markers.Strict3 <- FindAllMarkers(combined.Strict3, assay = "SCT", verbose = FALSE, only.pos = TRUE, logfc.threshold = 0.05,)
head(combined.markers, n = 15)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## PKHD1L1 0.000000e+00 1.3393360 0.427 0.084 0.000000e+00 0 PKHD1L1
## RELN 0.000000e+00 1.0745094 0.360 0.071 0.000000e+00 0 RELN
## MMRN1 0.000000e+00 0.9988334 0.384 0.071 0.000000e+00 0 MMRN1
## PIEZO2 4.648381e-308 0.7442944 0.329 0.055 1.057600e-303 0 PIEZO2
## DOCK5 7.499172e-308 0.8076344 0.378 0.084 1.706212e-303 0 DOCK5
## PGM5 1.557859e-266 0.6269384 0.453 0.146 3.544440e-262 0 PGM5
## NRG3 3.081320e-260 0.8306883 0.319 0.064 7.010620e-256 0 NRG3
## VAV3 2.137763e-257 0.6256245 0.358 0.089 4.863838e-253 0 VAV3
## MPP7 8.685045e-255 0.6305471 0.276 0.045 1.976021e-250 0 MPP7
## PDE1A 1.074184e-241 0.9144833 0.343 0.088 2.443983e-237 0 PDE1A
## TFPI 1.054771e-240 0.7126664 0.625 0.305 2.399816e-236 0 TFPI
## TLL1 1.804908e-224 0.6305531 0.698 0.350 4.106526e-220 0 TLL1
## LYVE1 1.930487e-218 0.4650901 0.242 0.039 4.392244e-214 0 LYVE1
## PROX1 1.677030e-217 0.5227144 0.242 0.040 3.815578e-213 0 PROX1
## STAB2 7.975090e-213 0.3713843 0.206 0.025 1.814492e-208 0 STAB2
#We produce a table with top 50 marker genes per cluster to facilitate use
combined.markers.Strict3 %>%
group_by(cluster) %>%
slice_max(n = 50, order_by = avg_log2FC)
## # A tibble: 450 × 7
## # Groups: cluster [9]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr>
## 1 0 1.80 0.725 0.088 0 Lymphatic PKHD1L1
## 2 0 1.61 0.686 0.175 0 Lymphatic EFNA5
## 3 0 1.53 0.613 0.074 0 Lymphatic RELN
## 4 0 1.44 0.812 0.4 0 Lymphatic PPFIBP1
## 5 0 1.39 0.618 0.082 0 Lymphatic MMRN1
## 6 0 1.39 0.576 0.089 0 Lymphatic PDE1A
## 7 0 1.35 0.676 0.148 0 Lymphatic LINC02147
## 8 0 1.22 0.528 0.069 0 Lymphatic NRG3
## 9 0 1.19 0.569 0.111 0 Lymphatic LINC02208
## 10 0 1.19 0.628 0.089 0 Lymphatic DOCK5
## # … with 440 more rows
#Following code is to produce a heatmap of the 20 top marker genes per cluster
combined.markers.Strict3 %>%
group_by(cluster) %>%
top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(Strict3, features = top10$gene) + theme(text = element_text(size = 7.5))
## Warning in DoHeatmap(Strict3, features = top10$gene): The following features
## were omitted as they were not found in the scale.data slot for the SCT assay:
## PKHD1L1
DefaultAssay(Strict3) <- "SCT"
VlnPlot(object = Strict3, features = c('MMRN1',"TLL1","ARL15","CADM2","NCKAP5","RGS6"))
library(Seurat) library(ggplot2) library(cowplot)
pbmc <- readRDS(“data/pbmc_2k_v3_Seurat.rds”)
features <- c(“CD79A”, “MS4A1”, “CD8A”, “CD8B”, “LYZ”, “LGALS3”, “S100A8”, “GNLY”, “NKG7”, “KLRB1”, “FCGR3A”, “FCER1A”, “CST3”)
a <- VlnPlot(pbmc, features, stack = TRUE, sort = TRUE) + theme(legend.position = “none”) + ggtitle(“Identity on y-axis”)
b <- VlnPlot(pbmc, features, stack = TRUE, sort = TRUE, flip = TRUE) + theme(legend.position = “none”) + ggtitle(“Identity on x-axis”)
DefaultAssay(Strict3) <- "SCT"
features <- c('MMRN1',"TLL1","IGFBP3","CADM2","NCKAP5","RGS6","PDE3B")
a <- VlnPlot(Strict3, features, stack = TRUE, sort = TRUE) +
theme(legend.position = "none") + ggtitle("Identity on y-axis")
b <- VlnPlot(Strict3, features, stack = TRUE, sort = TRUE, flip = TRUE) +
theme(legend.position = "none") + ggtitle("Identity on x-axis")
b
DotPlot(Strict3,features = genes, assay = "SCT",cols = c("blue", "red"))+RotatedAxis()
saveRDS(Strict3, "endothelial.rds")
## extract meta data
mp <- Strict3@meta.data %>% as.data.table
# the resulting md object has one "row" per cell
## count the number of cells per unique combinations of "Sample" and "seurat_clusters"
mp[, .N, by = c("orig.ident", "seurat_clusters")]
## orig.ident seurat_clusters N
## 1: 13S 4 199
## 2: 13S 1 343
## 3: 13S 2 194
## 4: 13S 0 351
## 5: 13S 6 121
## 6: 13S 3 350
## 7: 13S 5 93
## 8: 13S 7 24
## 9: 13V 0 307
## 10: 13V 1 186
## 11: 13V 4 113
## 12: 13V 2 114
## 13: 13V 6 86
## 14: 13V 5 39
## 15: 13V 3 61
## 16: 13V 7 7
## 17: 9S 2 323
## 18: 9S 1 313
## 19: 9S 0 474
## 20: 9S 3 235
## 21: 9S 4 145
## 22: 9S 5 142
## 23: 9S 6 34
## 24: 9S 7 26
## 25: 9V 0 734
## 26: 9V 4 136
## 27: 9V 3 84
## 28: 9V 1 204
## 29: 9V 5 76
## 30: 9V 2 97
## 31: 9V 6 40
## 32: 9V 7 34
## 33: 10S 2 368
## 34: 10S 0 502
## 35: 10S 3 412
## 36: 10S 1 336
## 37: 10S 4 76
## 38: 10S 5 218
## 39: 10S 6 8
## 40: 10S 7 10
## 41: 10V 0 179
## 42: 10V 3 52
## 43: 10V 5 17
## 44: 10V 1 58
## 45: 10V 6 12
## 46: 10V 4 53
## 47: 10V 2 41
## 48: 10V 7 11
## 49: 17S 0 390
## 50: 17S 2 396
## 51: 17S 3 265
## 52: 17S 4 88
## 53: 17S 5 81
## 54: 17S 1 349
## 55: 17S 6 13
## 56: 17S 7 15
## 57: 17V 0 358
## 58: 17V 1 78
## 59: 17V 3 51
## 60: 17V 7 1
## 61: 17V 4 38
## 62: 17V 2 33
## 63: 17V 5 30
## 64: 17V 6 12
## orig.ident seurat_clusters N
## with additional casting after the counting
mp[, .N, by = c("orig.ident", "sub.cluster")] %>% dcast(., orig.ident ~ sub.cluster, value.var = "N")
## orig.ident 0_0 0_1 1 2 3 4 5 6 7
## 1: 10S 92 410 336 368 412 76 218 8 10
## 2: 10V 136 43 58 41 52 53 17 12 11
## 3: 13S 92 259 343 194 350 199 93 121 24
## 4: 13V 222 85 186 114 61 113 39 86 7
## 5: 17S 111 279 349 396 265 88 81 13 15
## 6: 17V 315 43 78 33 51 38 30 12 1
## 7: 9S 99 375 313 323 235 145 142 34 26
## 8: 9V 644 90 204 97 84 136 76 40 34
ggplot(Strict3@meta.data, aes(Strict3@meta.data$sub.cluster, fill = Strict3@meta.data$orig.ident))+geom_bar(stat="count")+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
summary(Strict.list[["Subcutaneous"]]@meta.data)
```r
saveRDS(Strict3, "endothelial.rds")
saveRDS(Strict3, "endothelial.rds")